Load required packages

library(tidyverse)
library(phyloseq)
library(speedyseq)
library(ggrepel)
library(ampvis2)
library(plotly)
library(microbiome)
options(getClass.msg=FALSE) # https://github.com/epurdom/clusterExperiment/issues/66
#this fixes an error message that pops up because the class 'Annotated' is defined in two different packages

Load functions from Github

'%!in%' <- function(x,y)!('%in%'(x,y))

source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_taxa_tests.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_normalisation.R")
## Loading required package: scales
## 
## Attaching package: 'scales'
## The following object is masked from 'package:microbiome':
## 
##     alpha
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
## Loading required package: reshape2
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_alpha.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_beta.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_heatmap.R")

Load physeq object

ps = "data/processed/physeq_update_23_11.RDS"

ps %>% 
  here::here() %>%
  readRDS() %>%
  phyloseq_get_strains_fast() %>%
  phyloseq_remove_chloro_mitho() -> physeq
## Joining, by = "ASV"
physeq %>% 
  subset_samples(Experiment == "Continuous") %>% 
  subset_samples(Paul %!in% c("Paul")) %>%
  subset_samples(Reactor != "IR2") -> ps_PolyFermS

We will be analysing only the PolyFermS samples here so take a subset of the physeq object.

physeq %>% 
  subset_samples(Experiment == "Continuous") %>% 
  subset_samples(Paul %!in% c("Paul")) %>%
  subset_samples(Reactor != "IR2") -> ps_polyFermS

sample_data(ps_polyFermS)$Reactor <- fct_relevel(sample_data(ps_polyFermS)$Reactor, "IR1", "CR", "TR1", "TR2","TR3", "TR4", "TR5", "TR6") 

sample_data(ps_polyFermS)$Treatment <- fct_relevel(sample_data(ps_polyFermS)$Treatment, "UNTREATED",  "CTX+HV292.1", "CTX","HV292.1","VAN+CCUG59168", "VAN",  "CCUG59168") 

sample_data(ps_polyFermS)$Reactor_Treatment <- fct_relevel(sample_data(ps_polyFermS)$Reactor_Treatment, "IR1_UNTREATED","CR_UNTREATED", "CR_CTX", "CR_VAN", "TR1_CTX+HV292.1","TR2_CTX", "TR3_HV292.1", "TR5_VAN+CCUG59168", "TR4_VAN", "TR6_CCUG59168") 

ps_polyFermS %>% 
  rarefy_even_depth(sample.size = 4576,
                    rngseed = 123) -> ps_polyFermS_rare
## `set.seed(123)` was used to initialize repeatable random subsampling.
## Please record this for your records so others can reproduce.
## Try `set.seed(123); .Random.seed` for the full vector
## ...
## 16 samples removedbecause they contained fewer reads than `sample.size`.
## Up to first five removed samples are:
## CR-40-S166ETR1-30-S178ETR1-42-S194ETR2-30-S195IR1-40-S197
## ...
## 50OTUs were removed because they are no longer 
## present in any sample after random subsampling
## ...

Compute beta-div metrics:

ps_polyFermS_rare %>%
  phyloseq_compute_bdiv(norm = "pc",
                        phylo = TRUE,
                        seed = 123) -> bdiv_list
## Loading required package: ape
## Loading required package: GUniFrac
## Loading required package: vegan
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-7
## 
## Attaching package: 'vegan'
## The following object is masked from 'package:microbiome':
## 
##     diversity
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
## 
##     count
ps_polyFermS_rare  %>%
  subset_samples(Enrichment == "NotEnriched") %>%
  phyloseq_plot_bdiv(bdiv_list,
                     m = "CoDa",
                     seed = 123) -> coda
  
coda$PCA$layers[[1]] = NULL

coda$PCA + geom_point(size=2,
                   aes(color = Reactor_Treatment, 
                       fill = Reactor_Treatment,
                       shape = NULL,
                       alpha = Day_from_Inoculum)) + 
  theme_light() +
  geom_path(arrow = arrow(type = "open", angle = 30, length = unit(0.15, "inches")),
              size = 0.05, linetype = "dashed", inherit.aes = TRUE, aes(group=Reactor_Treatment, color = Reactor_Treatment), show.legend = FALSE) +
  scale_alpha_continuous(range=c( 0.9, 0.3)) + scale_color_viridis_d(na.value = "black") + 
  scale_fill_viridis_d(na.value = "black") + 
  # scale_shape_manual(values = c(8, 21, 22, 23, 24, 16, 15, 18, 17)) + 
  theme_classic() +
    labs(col=NULL, fill = NULL, shape = NULL) + guides(shape=FALSE) -> p1

p1

p1 %>%
  ggplotly() -> p1ply
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
p1ply
htmlwidgets::saveWidget(as_widget(p1ply), 
  paste0(here::here(),
                    "/data/processed/",
       "beta_",
       format(Sys.time(), "%Y%b%d"),".html"))

Visualize distance ‘trajectories’ :

We will use the library use dist to get it in the dataframe we want for plotting useDist

ps = coda$physeq_clr

ps %>% 
  otu_table() %>%
  vegan::vegdist(method = "eucl") -> mtest

mtest %>%
  usedist::dist_groups(ps %>% get_variable("Reactor_Treatment")) -> dist_df

ps %>% 
    sample_data() %>%
    data.frame() %>%
    rownames_to_column('tmpid') %>%
    dplyr::select(tmpid, Day_from_Inoculum) -> meta_df
left_join(
  dist_df,
  meta_df %>%
    dplyr::rename("varGroup1" = Day_from_Inoculum),
  by = c("Item1" = "tmpid")) %>%
  left_join(
    meta_df %>%
    dplyr::rename("varGroup2" = Day_from_Inoculum),
  by = c("Item2" = "tmpid")) -> test

Please carefully check if the code makes sense

test %>% 
  head()
##       Item1      Item2       Group1       Group2               Label Distance
## 1 CR-1-S116 CR-10-S245 CR_UNTREATED CR_UNTREATED Within CR_UNTREATED 20.82583
## 2 CR-1-S116 CR-13-S148 CR_UNTREATED CR_UNTREATED Within CR_UNTREATED 21.99395
## 3 CR-1-S116 CR-14-S336 CR_UNTREATED CR_UNTREATED Within CR_UNTREATED 21.56607
## 4 CR-1-S116 CR-15-S292 CR_UNTREATED CR_UNTREATED Within CR_UNTREATED 22.35332
## 5 CR-1-S116 CR-16-S332 CR_UNTREATED CR_UNTREATED Within CR_UNTREATED 24.76417
## 6 CR-1-S116  CR-17-S97 CR_UNTREATED CR_UNTREATED Within CR_UNTREATED 29.12162
##   varGroup1 varGroup2
## 1        24        33
## 2        24        36
## 3        24        37
## 4        24        38
## 5        24        39
## 6        24        40
test %>%
  dplyr::filter(grepl("Within", Label)) %>%
  dplyr::group_by(Label, Item1, Item2, varGroup1, varGroup2) %>%
  dplyr::arrange(varGroup1, varGroup2) %>%
  dplyr::mutate(firsto = dplyr::first(Distance)) %>%
  dplyr::ungroup() %>%
  dplyr::group_by(Label, Item1, varGroup1) %>%
  dplyr::summarise(next_dist = first(firsto)) %>%
  dplyr::arrange(varGroup1, Label) -> test_prev
## `summarise()` regrouping output by 'Label', 'Item1' (override with `.groups` argument)
test_prev %>%
  DT::datatable()

Please carefully check if the code makes sense

test_prev %>%
  ggplot(aes(x = as.factor(varGroup1) , y = next_dist)) +
  geom_point(size=1, alpha=0.6, aes(colour = Label, group=Label),
            position=position_jitterdodge(dodge.width=0.9)) + 
  geom_path(arrow = arrow(type = "open", angle = 30, length = unit(0.1, "inches")),
              size = 0.4, linetype = "dashed", inherit.aes = TRUE, aes(group=Label, color = Label),
            position=position_jitterdodge(dodge.width=0.9)) +
  # geom_boxplot(aes(group = varGroup2 %>% as.factor()),
               # fill = "transparent",
               # outlier.colour = NA,alpha=0.4) +
  facet_grid(Label ~ ., scales = "fixed") +
      # ggrepel::geom_text_repel(cex=2,
      #                      aes(label= Group1),
      #                      segment.color = 'black',
      #                      segment.size = 0.5,
      #                      # nudge_x =  -4,
      #                      # nudge_y = 0,
      #                      df_all_t0 %>% filter(Day2 == 6 & metric=="wunifrac") %>% unique()) +
  theme_bw() + xlab("Day") + ylab("Distance to prev") +
    geom_vline(xintercept = c(23,39), 
             color="black", alpha=0.4) -> tmp

print(tmp)

tmp + facet_null()

ref = 24

test %>%
  dplyr::filter(grepl("Within", Label)) %>%
  dplyr::group_by(Label) %>%
  dplyr::arrange(varGroup1, varGroup2) %>%
  # dplyr::mutate(firsto = dplyr::first(Distance)) %>%
  dplyr::filter(varGroup1 == !!ref &
                varGroup2 >  !!ref) %>%
    dplyr::arrange(varGroup2) -> test_ref

Please carefully check if the code/rationale makes sense

I run some checks for the distance to ref:

test_ref %>%
  DT::datatable()
sample_to_check <- c("CR-1-S116", "CR-3-S98", "IR1-24-S295", "IR1-81-S298")

subset_samples(ps, 
               sample_names(ps) %in% sample_to_check) %>%
  sample_data() %>%
  data.frame() %>%
  dplyr::select(Sample_description,Day_of_Connection, Day_of_Treatment, Day_from_Inoculum) %>%
  DT::datatable()
mtest %>%
  as.matrix() -> mtest_mat

mtest_mat[sample_to_check,sample_to_check]
##             CR-1-S116 CR-3-S98 IR1-24-S295 IR1-81-S298
## CR-1-S116     0.00000 11.34670    15.47180    24.69989
## CR-3-S98     11.34670  0.00000    15.15914    24.46117
## IR1-24-S295  15.47180 15.15914     0.00000    24.52264
## IR1-81-S298  24.69989 24.46117    24.52264     0.00000

Please carefully check

test_ref %>%
  ggplot(aes(x = as.factor(varGroup2) , y = Distance)) +
  geom_point(size=1, alpha=0.6, aes(colour = Group1, group=Group1),
            position=position_jitterdodge(dodge.width=0.9)) + 
  geom_path(arrow = arrow(type = "open", angle = 30, length = unit(0.1, "inches")),
              size = 0.4, linetype = "dashed", inherit.aes = TRUE, aes(group=Group1, color = Group1),
            position=position_jitterdodge(dodge.width=0.9)) +
  # geom_boxplot(aes(group = varGroup2 %>% as.factor()),
               # fill = "transparent",
               # outlier.colour = NA,alpha=0.4) +
  facet_grid(Label ~ ., scales = "fixed") +
      # ggrepel::geom_text_repel(cex=2,
      #                      aes(label= Group1),
      #                      segment.color = 'black',
      #                      segment.size = 0.5,
      #                      # nudge_x =  -4,
      #                      # nudge_y = 0,
      #                      df_all_t0 %>% filter(Day2 == 6 & metric=="wunifrac") %>% unique()) +
  theme_bw() + xlab("Day") + ylab(paste0("Distance to ref :", ref)) +
    geom_vline(xintercept = c(23,39),
             color="black", alpha=0.4) -> tmp


print(tmp + theme(legend.position = "none") )

Not sure why there is two values for day 26. Not sure why the vertical line is 61 here since I used the same command as the previous plot…

tmp + facet_null()

Next step: ref (should be easy with the dplyr::filter(grepl(“Within”, Label)))

paste0(here::here(),
       "/data/processed/",
       "beta",
       "_",
       format(Sys.time(), "%Y%b%d")
       ,".RData") %>% save.image()
# load("/Users/fconstan/Projects/EZe/ASV/260420.RData")
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] GUniFrac_1.1       matrixStats_0.57.0 vegan_2.5-7        lattice_0.20-41   
##  [5] permute_0.9-5      ape_5.4-1          reshape2_1.4.4     scales_1.1.1      
##  [9] microbiome_2.1.26  plotly_4.9.2.2     ampvis2_2.6.4      ggrepel_0.9.0     
## [13] speedyseq_0.3.0    phyloseq_1.30.0    forcats_0.5.0      stringr_1.4.0     
## [17] dplyr_1.0.2        purrr_0.3.4        readr_1.4.0        tidyr_1.1.2       
## [21] tibble_3.0.4       ggplot2_3.3.3      tidyverse_1.3.0   
## 
## loaded via a namespace (and not attached):
##  [1] Rtsne_0.15          colorspace_2.0-0    ellipsis_0.3.1     
##  [4] rprojroot_2.0.2     XVector_0.26.0      fs_1.5.0           
##  [7] rstudioapi_0.13     farver_2.0.3        DT_0.16            
## [10] ggnet_0.1.0         fansi_0.4.1         lubridate_1.7.9.2  
## [13] xml2_1.3.2          codetools_0.2-18    splines_3.6.3      
## [16] doParallel_1.0.16   knitr_1.30          ade4_1.7-16        
## [19] jsonlite_1.7.2      broom_0.7.3         cluster_2.1.0      
## [22] dbplyr_2.0.0        compiler_3.6.3      httr_1.4.2         
## [25] backports_1.2.1     assertthat_0.2.1    Matrix_1.3-0       
## [28] lazyeval_0.2.2      cli_2.2.0           htmltools_0.5.0    
## [31] prettyunits_1.1.1   tools_3.6.3         igraph_1.2.6       
## [34] gtable_0.3.0        glue_1.4.2          Rcpp_1.0.5         
## [37] Biobase_2.46.0      cellranger_1.1.0    vctrs_0.3.6        
## [40] Biostrings_2.54.0   zCompositions_1.3.4 multtest_2.42.0    
## [43] nlme_3.1-151        crosstalk_1.1.0.1   iterators_1.0.13   
## [46] xfun_0.19           network_1.16.1      rvest_0.3.6        
## [49] lifecycle_0.2.0     zlibbioc_1.32.0     MASS_7.3-53        
## [52] hms_0.5.3           parallel_3.6.3      biomformat_1.14.0  
## [55] rhdf5_2.30.1        RColorBrewer_1.1-2  yaml_2.2.1         
## [58] NADA_1.6-1.1        stringi_1.5.3       S4Vectors_0.24.4   
## [61] foreach_1.5.1       BiocGenerics_0.32.0 truncnorm_1.0-8    
## [64] rlang_0.4.10        pkgconfig_2.0.3     evaluate_0.14      
## [67] Rhdf5lib_1.8.0      labeling_0.4.2      patchwork_1.1.1    
## [70] htmlwidgets_1.5.3   tidyselect_1.1.0    here_1.0.1         
## [73] plyr_1.8.6          magrittr_2.0.1      R6_2.5.0           
## [76] IRanges_2.20.2      generics_0.1.0      DBI_1.1.0          
## [79] pillar_1.4.7        haven_2.3.1         withr_2.3.0        
## [82] mgcv_1.8-33         survival_3.2-7      modelr_0.1.8       
## [85] crayon_1.3.4        rmarkdown_2.6       progress_1.2.2     
## [88] grid_3.6.3          readxl_1.3.1        data.table_1.13.6  
## [91] reprex_0.3.0        digest_0.6.27       usedist_0.4.0      
## [94] stats4_3.6.3        munsell_0.5.0       viridisLite_0.3.0